library(tidyverse)
library(ggdist)
library(ggside)
library(easystats)
library(patchwork)
library(brms)
library(lavaan)
library(ggraph)
library(tidySEM)

Perceptual Task

perceptual <- read.csv("../data/scores_perceptual.csv") |> 
  select(-contains("Sigma")) |> 
  select(-contains("Beta")) |> 
  # mutate(across(ends_with("_Error"), \(x) -1*x)) |>  # So that higher score = More errors
  mutate(across(ends_with("Intercept_Error"), \(x) -1*x)) |>  # Not the intercept (re-reverse)
  mutate(across(ends_with("_RT"), \(x) -1*x)) |>  # So that higher score = Longer RTs
  mutate(across(ends_with("Intercept_RT"), \(x) -1*x))  # Not the intercept (re-reverse)

for(i in names(perceptual)[names(perceptual) != "Participant"]) {
  perceptual[check_outliers(perceptual[[i]], method = "zscore_robust", threshold = 5), i] <- NA 
}

perceptual <- standardize(perceptual) |> 
   mutate(across(where(is.numeric), as.numeric))

Distribution

perceptual |> 
  estimate_density(method = "kernSmooth") |> 
  separate(Parameter, into = c("Task", "Type", "Parameter", "Model")) |> 
  ggplot(aes(x=x, y=y, color=Type, linetype = Parameter)) +
  geom_line(linetype = 2) +
  facet_grid(Parameter~Model)

Correlation

cor <- correlation::correlation(perceptual, redundant = TRUE, p_adjust = "none")

p_data <- cor |>
  mutate(Parameter1 = str_replace_all(str_remove(Parameter1, "Perception_"), "_", " "),
         Parameter2 = str_replace_all(str_remove(Parameter2, "Perception_"), "_", " ")) |>
  correlation::cor_sort(hclust_method = "ward.D2") |>
  cor_lower() |>
  mutate(
    Text = insight::format_value(r, zap_small = TRUE, digits = 3),
    Text = str_replace(str_remove(Text, "^0+"), "^-0+", "-"),
    Text = paste0(Text, insight::format_p(p, stars_only = TRUE)),
    Parameter2 = fct_rev(Parameter2)
  )


p_data |> 
  ggplot(aes(x = Parameter2, y = Parameter1)) +
  geom_tile(aes(fill = r)) +
  geom_text(aes(label = Text), size = rel(2), alpha=2/3) +
  scale_fill_gradient2(low = "#2196F3", mid = "white", high = "#F44336", midpoint = 0, limit = c(-1, 1), space = "Lab", name = "Correlation", guide = "legend") +
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) +
  labs(title = "Correlation Matrix of Perceptual Task Scores", x = NULL, y = NULL) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )


# heatmap(as.matrix(cor))

Factor Analysis

First Level

rez <- parameters::n_factors(select(perceptual, -Participant), n_max=15)
rez
## # Method Agreement Procedure:
## 
## The choice of 3 dimensions is supported by 4 (21.05%) methods out of 19 (CNG, Optimal coordinates, Parallel analysis, Kaiser criterion).
plot(rez)


fa <- parameters::factor_analysis(select(perceptual, -Participant),
  # cor = as.matrix(cor),
  n = 3,
  rotation = "oblimin",
  fm = "mle",
  sort = TRUE
)

insight::print_md(fa)
Rotated loadings from Factor Analysis (oblimin-rotation)
Variable ML2 ML1 ML3 Complexity Uniqueness
Perception_Ebbinghaus_Intercept_RT 1.01 0.01 0.04 1.00 4.44e-03
Perception_Ebbinghaus_Difficulty_RT 1.01 5.21e-03 0.03 1.00 5.88e-03
Perception_VerticalHorizontal_Intercept_RT 0.90 -0.03 -0.03 1.00 0.15
Perception_VerticalHorizontal_Difficulty_RT 0.89 -0.05 -0.03 1.01 0.16
Perception_MullerLyer_Intercept_RT 0.89 0.03 -0.01 1.00 0.22
Perception_MullerLyer_Difficulty_RT 0.88 0.02 -0.02 1.00 0.23
Perception_VerticalHorizontal_Difficulty_Error 2.54e-03 1.00 -9.37e-03 1.00 2.71e-03
Perception_VerticalHorizontal_Intercept_Error 1.84e-03 1.00 -8.15e-03 1.00 2.70e-03
Perception_Ebbinghaus_Difficulty_Error -0.08 0.41 0.24 1.70 0.64
Perception_Ebbinghaus_Intercept_Error -0.05 0.36 0.24 1.78 0.71
Perception_MullerLyer_Intercept_Error 8.08e-03 -1.60e-03 1.00 1.00 0.02
Perception_MullerLyer_Difficulty_Error -8.86e-03 1.47e-04 0.99 1.00 5.00e-03

The 3 latent factors (oblimin rotation) accounted for 82.17% of the total variance of the original data (ML2 = 43.72%, ML1 = 20.12%, ML3 = 18.33%).


plot(fa) +
  theme_minimal()

# psych::omega(data, fm = "mle", nfactors=7)
# fa <- psych::fa.multi(as.matrix(cor), nfactors = 3, nfact2 = 1, n.obs = nrow(random))
# psych::fa.multi.diagram(fa)

Second Level

data <- predict(fa)

# rez <- parameters::n_factors(data, rotation = "varimax")
# plot(rez)

fa2 <- parameters::factor_analysis(data,
  n = 1,
  rotation = "varimax",
  fm = "mle",
  sort = TRUE
)

insight::print_md(fa2)
Rotated loadings from Factor Analysis (varimax-rotation)
Variable ML1 Complexity Uniqueness
ML3 0.77 1.00 0.41
ML1 0.66 1.00 0.57
ML2 -0.42 1.00 0.82

The unique latent factor (varimax rotation) accounted for 40.10% of the total variance of the original data.

Sructural Equation Model

Level 1

fit_sem <- function(model, data, ...) {
  sem(model = model,
      data  = data[complete.cases(data), ],
      bounds = "standard",
      estimator = "ML",
      auto.fix.first=TRUE, 
      control=list(iter.max=200000, eval.max = 20000), 
      ...) 
}


names <- names(select(perceptual, -Participant))
data <- setNames(as.data.frame(str_split_fixed(names, "_", 4)), c("Task", "Illusion_Type", "Parameter", "Outcome"))
data$Name <- names

# Model 0 - by EFA
by_efa <-  efa_to_cfa(fa, threshold = "max")
# cat(by_model)
m0 <- fit_sem(by_efa, data=perceptual)

# Model 1 - by Model
by_model <-  paste(sort(paste0("p_", ifelse(str_detect(data$Outcome, "RT"), "RT", "Error"), " =~ ", data$Name)), collapse = "\n")
# cat(by_model)
m1 <- fit_sem(by_model, data=perceptual)

# Model 2 - by Component
by_component <-  paste(sort(paste0("p_", data$Outcome, " =~ ", data$Name)), collapse = "\n")
# cat(by_component)
m2 <- fit_sem(by_component, data=perceptual)

# Model 3 - by Type
by_type <-  paste(paste(paste0(data$Illusion_Type), "=~", data$Name), collapse = "\n")
# cat(by_type)
m3 <- fit_sem(by_type, data=perceptual)

# Model 4 - by Parameter
by_param <-  paste(paste(paste0(data$Parameter), "=~", data$Name), collapse = "\n")
# cat(by_param)
m4 <- fit_sem(by_param, data=perceptual)

# Model 4 - Unique
m5 <-  fit_sem(paste0("p =~ ", paste(names, collapse = " + ")), data=perceptual)


anova(m2, m0, m1, m3, m4, m5)
## Chi-Squared Difference Test
## 
##    Df   AIC   BIC Chisq Chisq diff Df diff Pr(>Chisq)    
## m0 51  7574  7687  6645                                  
## m2 53  6717  6822  5792       -853       2          1    
## m1 53  6717  6822  5792          0       0               
## m4 53 10050 10154  9124       3332       0               
## m5 54 10273 10373  9349        225       1     <2e-16 ***
## m3     9610  9722                                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
test_bf(m2, m0, m1, m3, m4, m5)
## Bayes Factors for Model Comparison
## 
##     Model        BF
## [2] m0    1.28e-188
## [3] m1         1.00
## [4] m3     0.00e+00
## [5] m4     0.00e+00
## [6] m5     0.00e+00
## 
## * Against Denominator: [1] m2
## *   Bayes Factor Type: BIC approximation

Common Factor

# One factor
m2b <- fit_sem(paste0(by_component, "\np =~ p_RT + p_Error"), data=perceptual)

anova(m2, m2b)
## Chi-Squared Difference Test
## 
##     Df  AIC  BIC Chisq Chisq diff Df diff Pr(>Chisq)
## m2b 52 6749 6858  5822                              
## m2  53 6717 6822  5792      -30.2       1          1
test_bf(m2, m2b)
## Bayes Factors for Model Comparison
## 
##     Model       BF
## [2] m2b   1.28e-08
## 
## * Against Denominator: [1] m2
## *   Bayes Factor Type: BIC approximation
compare_performance(m2, m2b, metrics = c("RMSEA", "NFI", "CFI"))
## # Comparison of Model Performance Indices
## 
## Name |  Model | RMSEA |   NFI |   CFI
## -------------------------------------
## m2   | lavaan | 0.474 | 0.620 | 0.622
## m2b  | lavaan | 0.480 | 0.618 | 0.620

Inspection

summary(m2, standardize = TRUE, fit.measures = TRUE)
## lavaan 0.6-12 ended normally after 887 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        25
## 
##   Number of observations                           481
## 
## Model Test User Model:
##                                                       
##   Test statistic                              5791.869
##   Degrees of freedom                                53
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             15236.653
##   Degrees of freedom                                66
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.622
##   Tucker-Lewis Index (TLI)                       0.529
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3333.612
##   Loglikelihood unrestricted model (H1)       -437.678
##                                                       
##   Akaike (AIC)                                6717.224
##   Bayesian (BIC)                              6821.621
##   Sample-size adjusted Bayesian (BIC)         6742.274
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.474
##   90 Percent confidence interval - lower         0.464
##   90 Percent confidence interval - upper         0.485
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.152
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   p_Error =~                                                            
##     Prcptn_Ebb_D_E    1.000                               0.495    0.520
##     Prcptn_Ebb_I_E    1.004    0.110    9.138    0.000    0.497    0.496
##     Prcptn_MlL_D_E    0.978    0.107    9.163    0.000    0.484    0.498
##     Prcptn_MlL_I_E    0.971    0.107    9.114    0.000    0.481    0.494
##     Prcptn_VrH_D_E    1.972    0.148   13.346    0.000    0.976    1.000
##     Prcptn_VrH_I_E    1.972    0.148   13.346    0.000    0.976    1.000
##   p_RT =~                                                               
##     Prcptn_Eb_D_RT    1.000                               0.983    0.996
##     Prcptn_Eb_I_RT    1.003    0.005  215.961    0.000    0.986    0.999
##     Prcptn_ML_D_RT    0.858    0.021   40.338    0.000    0.844    0.881
##     Prcptn_ML_I_RT    0.866    0.021   41.827    0.000    0.851    0.889
##     Prcptn_VH_D_RT    0.925    0.019   48.505    0.000    0.909    0.914
##     Prcptn_VH_I_RT    0.929    0.019   49.606    0.000    0.913    0.918
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   p_Error ~~                                                            
##     p_RT             -0.120    0.025   -4.887    0.000   -0.247   -0.247
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Prc_E_D_E         0.661    0.043   15.508    0.000    0.661    0.730
##    .Prc_E_I_E         0.755    0.049   15.508    0.000    0.755    0.754
##    .Pr_ML_D_E         0.710    0.046   15.508    0.000    0.710    0.752
##    .Pr_ML_I_E         0.714    0.046   15.508    0.000    0.714    0.756
##    .Pr_VH_D_E         0.001    0.001    0.756    0.450    0.001    0.001
##    .Pr_VH_I_E (lb)    0.000    0.001    0.000             0.000    0.000
##    .Pr_E_D_RT         0.008    0.001    6.827    0.000    0.008    0.008
##    .Pr_E_I_RT         0.002    0.001    1.531    0.126    0.002    0.002
##    .P_ML_D_RT         0.204    0.013   15.425    0.000    0.204    0.223
##    .P_ML_I_RT         0.193    0.013   15.418    0.000    0.193    0.210
##    .P_VH_D_RT         0.162    0.011   15.380    0.000    0.162    0.164
##    .P_VH_I_RT         0.156    0.010   15.373    0.000    0.156    0.158
##     p_Error           0.245    0.040    6.130    0.000    1.000    1.000
##     p_RT              0.966    0.063   15.381    0.000    1.000    1.000
graph_sem(m2, layout = get_layout(m2, layout_algorithm = "layout_with_lgl")) 

Scores Extraction

scores_p <- mutate(as.data.frame(predict(m2)), Participant = perceptual$Participant[complete.cases(perceptual)])

Empirical Scores

empirical <- read.csv("../data/preprocessed_perceptual.csv")

empirical <- empirical |> 
  group_by(Participant, Illusion_Type) |> 
  summarize(Empirical_Errors = as.numeric(sum(Error, na.rm=TRUE))) |> 
  full_join(
    empirical |> 
      filter(Error == 0) |> 
      group_by(Participant, Illusion_Type) |> 
      summarize(Empirical_MeanRT = mean(RT, na.rm=TRUE)), by = c("Participant", "Illusion_Type")) |> 
  ungroup() |> 
  pivot_wider(names_from = "Illusion_Type", values_from = c("Empirical_Errors", "Empirical_MeanRT")) |> 
  left_join(scores_p, by = "Participant") |> 
  left_join(perceptual, by = "Participant")

correlation::correlation(select(empirical, starts_with("Empirical")), select(empirical, -Participant, -starts_with("Empirical"))) |> 
  summary() |> 
  plot() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

Illusion Task

illusion <- read.csv("../data/scores_illusion1.csv") |> 
  select(-contains("Diff")) |> 
  select(-contains("Interaction")) |>
  select(-contains("Sigma")) |> 
  select(-contains("Beta")) 
  # mutate(across(ends_with("_Error"), \(x) -1*x))  # So that higher score = More errors

for(i in names(illusion)[names(illusion) != "Participant"]) {
  illusion[check_outliers(illusion[[i]], method = "zscore_robust", threshold = 5), i] <- NA 
}

illusion <- standardize(illusion) |> 
   mutate(across(where(is.numeric), as.numeric))

Distribution

illusion |> 
  estimate_density(method = "kernSmooth") |> 
  separate(Parameter, into = c("Type", "Parameter", "Model")) |>
  ggplot(aes(x=x, y=y, color=Type)) +
  geom_line(linewidth=1) +
  facet_grid(Model ~ Parameter, scales = "free") 

Correlation

cor <- correlation::correlation(illusion, redundant = TRUE, p_adjust = "none")

p_data <- cor |>
  correlation::cor_sort(hclust_method = "ward.D2") |>
  cor_lower() |>
  mutate(
    Text = insight::format_value(r, zap_small = TRUE, digits = 3),
    Text = str_replace(str_remove(Text, "^0+"), "^-0+", "-"),
    Text = paste0(Text, insight::format_p(p, stars_only = TRUE)),
    Parameter2 = fct_rev(Parameter2)
  )


p_data |>
  ggplot(aes(x = Parameter2, y = Parameter1)) +
  geom_tile(aes(fill = r)) +
  geom_text(aes(label = Text), size = rel(4), alpha=3/3) +
  scale_fill_gradient2(low = "#2196F3", mid = "white", high = "#F44336", midpoint = 0, limit = c(-1, 1), space = "Lab", name = "Correlation", guide = "legend") +
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) +
  labs(title = "Correlation Matrix", x = NULL, y = NULL) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )


# heatmap(as.matrix(cor))

Factor Analysis

First Level

rez <- parameters::n_factors(select(illusion, -Participant), n_max=9)
rez
## # Method Agreement Procedure:
## 
## The choice of 2 dimensions is supported by 7 (43.75%) methods out of 16 (Optimal coordinates, Parallel analysis, Kaiser criterion, VSS complexity 1, VSS complexity 2, BIC, BIC (adjusted)).
plot(rez)


fa <- parameters::factor_analysis(select(illusion, -Participant),
  # cor = as.matrix(cor),
  n = 2,
  rotation = "oblimin",
  fm = "mle",
  sort = TRUE
)

insight::print_md(fa)
Rotated loadings from Factor Analysis (oblimin-rotation)
Variable ML2 ML1 Complexity Uniqueness
MullerLyer_Strength_RT 0.88 -0.04 1.00 0.25
Ebbinghaus_Strength_RT 0.77 -0.12 1.04 0.43
VerticalHorizontal_Strength_RT 0.72 0.19 1.14 0.39
Ebbinghaus_Strength_Error 0.45 0.12 1.15 0.75
MullerLyer_Strength_Error 0.29 0.21 1.84 0.84
VerticalHorizontal_Strength_Error 1.95e-04 1.00 1.00 5.00e-03

The 2 latent factors (oblimin rotation) accounted for 55.57% of the total variance of the original data (ML2 = 36.66%, ML1 = 18.91%).


plot(fa) +
  theme_minimal()

# psych::omega(data, fm = "mle", nfactors=7)
# fa <- psych::fa.multi(as.matrix(cor), nfactors = 3, nfact2 = 1, n.obs = nrow(random))
# psych::fa.multi.diagram(fa)

Second Level

# data <- predict(fa)
# 
# rez <- parameters::n_factors(na.omit(data), rotation = "oblimin")
# plot(rez)
# 
# fa2 <- parameters::factor_analysis(data,
#   n = 2,
#   rotation = "oblimin",
#   fm = "mle",
#   sort = TRUE
# )
# 
# insight::print_md(fa2)

Sructural Equation Model

Level 1

names <- names(select(illusion, -Participant))
data <- setNames(as.data.frame(str_split_fixed(names, "_", 3)), c("Illusion_Type", "Parameter", "Outcome"))
data$Name <- names

# Model 1 - EFA
by_efa <- efa_to_cfa(fa, threshold = "max")
m1 <- fit_sem(by_efa, data=illusion)

# Model 2 - by Model
by_model <-  paste(sort(paste(data$Outcome, "=~", data$Name)), collapse = "\n")
# cat(by_model)
m2 <- fit_sem(by_model, data=illusion)

# Model 3 - by Type
by_type <-  paste(paste(paste0(data$Illusion_Type), "=~", data$Name), collapse = "\n")
# cat(by_type)
m3 <- fit_sem(by_type, data=illusion)

# Model 4 - Unique
m4 <-  fit_sem(paste0("p =~ ", paste(names, collapse = " + ")), data=illusion)

anova(m1, m2, m3, m4)
## Chi-Squared Difference Test
## 
##    Df  AIC  BIC Chisq Chisq diff Df diff Pr(>Chisq)    
## m3  6 7580 7643  64.3                                  
## m2  8 7598 7653  86.9       22.6       2    1.3e-05 ***
## m1  9 7611 7661 101.4       14.5       1    0.00014 ***
## m4  9 7611 7661 101.4        0.0       0               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
test_bf(m1, m2, m3, m4)
## Bayes Factors for Model Comparison
## 
##     Model       BF
## [2] m2       63.13
## [3] m3    1.02e+04
## [4] m4       1.000
## 
## * Against Denominator: [1] m1
## *   Bayes Factor Type: BIC approximation

Common Factor

m5 <- fit_sem(paste0(by_type, "\ni =~ Ebbinghaus + MullerLyer + VerticalHorizontal"), data=illusion)

anova(m1, m5)
## Chi-Squared Difference Test
## 
##    Df  AIC  BIC Chisq Chisq diff Df diff Pr(>Chisq)    
## m5  6 7580 7643  64.3                                  
## m1  9 7611 7661 101.4       37.1       3    4.5e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
test_bf(m1, m5)
## Bayes Factors for Model Comparison
## 
##     Model       BF
## [2] m5    1.02e+04
## 
## * Against Denominator: [1] m1
## *   Bayes Factor Type: BIC approximation
compare_performance(m1, m5, metrics = c("RMSEA", "NFI", "CFI"))
## # Comparison of Model Performance Indices
## 
## Name |  Model | RMSEA |   NFI |   CFI
## -------------------------------------
## m1   | lavaan | 0.144 | 0.879 | 0.887
## m5   | lavaan | 0.140 | 0.923 | 0.929

Inspection

display(performance(m5))
Chi2(6) p (Chi2) Baseline(15) p (Baseline) GFI AGFI NFI NNFI CFI RMSEA RMSEA CI p (RMSEA) RMR SRMR RFI PNFI IFI RNI Loglikelihood AIC BIC BIC_adjusted
64.30 < .001 835.82 < .001 0.96 0.85 0.92 0.82 0.93 0.14 [0.11, 0.17] < .001 0.06 0.06 0.81 0.37 0.93 0.93 -3774.78 7579.57 7642.58 7594.97
summary(m5, standardize = TRUE, fit.measures = TRUE)
## lavaan 0.6-12 ended normally after 78 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        15
## 
##   Number of observations                           493
## 
## Model Test User Model:
##                                                       
##   Test statistic                                64.303
##   Degrees of freedom                                 6
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               835.820
##   Degrees of freedom                                15
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.929
##   Tucker-Lewis Index (TLI)                       0.822
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -3774.784
##   Loglikelihood unrestricted model (H1)      -3742.632
##                                                       
##   Akaike (AIC)                                7579.568
##   Bayesian (BIC)                              7642.575
##   Sample-size adjusted Bayesian (BIC)         7594.965
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.140
##   90 Percent confidence interval - lower         0.111
##   90 Percent confidence interval - upper         0.172
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.063
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                         Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Ebbinghaus =~                                                              
##     Ebbnghs_Strn_E         1.000                               0.498    0.498
##     Ebbnghs_Str_RT         1.493    0.150    9.948    0.000    0.744    0.766
##   MullerLyer =~                                                              
##     MllrLyr_Strn_E         1.000                               0.323    0.325
##     MllrLyr_Str_RT         2.776    0.457    6.076    0.000    0.898    0.915
##   VerticalHorizontal =~                                                      
##     VrtclHrznt_S_E         1.000                               0.340    0.340
##     VrtclHrzn_S_RT         2.881    0.530    5.433    0.000    0.980    1.000
##   i =~                                                                       
##     Ebbinghaus             1.000                               0.963    0.963
##     MullerLyer             0.635    0.121    5.250    0.000    0.942    0.942
##     VerticalHrzntl         0.532    0.112    4.738    0.000    0.750    0.750
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .Ebbng_S_E         0.752    0.052   14.429    0.000    0.752    0.752
##    .Ebbn_S_RT         0.388    0.052    7.466    0.000    0.388    0.413
##    .MllrL_S_E         0.885    0.058   15.365    0.000    0.885    0.894
##    .MllL_S_RT         0.156    0.092    1.699    0.089    0.156    0.162
##    .VrtcH_S_E         0.885    0.058   15.125    0.000    0.885    0.884
##    .VrtH_S_RT (lb)    0.000    0.130    0.000             0.000    0.000
##    .Ebbinghas         0.018    0.021    0.859    0.390    0.073    0.073
##    .MullerLyr         0.012    0.011    1.070    0.285    0.113    0.113
##    .VrtclHrzn         0.051    0.014    3.752    0.000    0.438    0.438
##     i                 0.230    0.045    5.100    0.000    1.000    1.000
# graph_sem(m5, layout = get_layout(m5, layout_algorithm = "layout_with_lgl")) 
colors <- c("i" = "#9C27B0", 
            "Ebbinghaus"="#3F51B5",
            "VerticalHorizontal"="#FF5722",
            "MullerLyer" = "#4CAF50",
            "black"="black")

edges <- tidySEM::get_edges(m5) |> 
  mutate(color = ifelse(sign(as.numeric(est_std)) >= 0, "1", "-1"),
         width = abs(as.numeric(est_std)),
         type = ifelse(from == "i", "curved", "straight"),
         text = paste(est_std, confint_std)) |> 
  filter(lhs != rhs)

nodes <- tidySEM::get_nodes(m5) |> 
  mutate(angle = ifelse(grepl("_", name), 90, 0),
         hjust = case_when(
           grepl("_", name) ~ 1.5,
           name == "i" ~ 0.5,
           TRUE ~ 0.5),
         size = case_when(
           grepl("_", name) ~ 1,
           name == "i" ~ 7,
           TRUE ~ 6),
         textsize = case_when(
           grepl("_", name) ~ 1,
           name == "i" ~ 3,
           TRUE ~ 1.1),
         face = case_when(
           grepl("_", name) ~ "italic",
           name == "i" ~ "bold.italic",
           TRUE ~ "plain"),
         color = str_remove_all(label, " - .*"),
         textcolor = ifelse(grepl("_", name), "black", "white"),
         label = case_when(
           label == "MullerLyer" ~ "Müller-Lyer",
           label == "VerticalHorizontal" ~ "Vertical-\nHorizontal",
           str_detect(label, "_Error") ~ "Error",
           str_detect(label, "_RT") ~ "RT ",
           TRUE ~ label
         ),
         family = ifelse(name == "i", "serif", "sans"))


p_sem <- tidygraph::tbl_graph(nodes = nodes, edges = edges) |> 
  ggraph(layout = "sugiyama") +
  geom_edge_arc(aes(filter=type!="straight",
                    edge_width = width,
                    color = color,
                    label=est_std),
                strength = 0.03 * c(-1, 1, 1),
                angle_calc="along",
                label_dodge=unit(-0.017, "npc"),
                label_size = rel(8)) +
  geom_edge_link(aes(filter=type=="straight",
                     edge_width = width, 
                     color = color,
                     label=est_std), 
                 angle_calc="along", 
                 label_dodge=unit(-0.017, "npc"),
                 label_size = rel(8)) +
  geom_node_point(aes(shape = shape, color=color, size=size)) +
  geom_node_text(aes(label = label, angle = angle, hjust=hjust),
                 color=nodes$textcolor, 
                 fontface=nodes$face,
                 size = rel(nodes$textsize*10),
                 family=nodes$family) +
  scale_y_continuous(expand = expansion(add=c(0.5, 0.5))) +
  scale_edge_width_continuous(range=c(0.3, 6)) +
  scale_edge_color_manual(values=c("1"="#263238", "-1"="#B71C1C")) +
  scale_color_manual(values=colors, guide="none") +
  scale_shape_manual(values=c("oval"="circle", "rect"="square"), guide="none") +
  scale_size_continuous(range = c(10, 95), guide="none") +
  guides(edge_colour = "none", edge_width = "none") + 
  labs(title = "Structural Equation Model")  +
  theme_graph() +
  theme(plot.title = element_text(hjust=0.5))

p_sem

ggsave("figures/figure_sem.png", p_sem, width=15, height=15)
# save(p_sem, file="models/p_sem.Rdata")

Scores Extraction

scores_i <- mutate(as.data.frame(predict(m5)), Participant = illusion$Participant[complete.cases(illusion)]) 

Empirical Scores

empirical <- read.csv("../data/preprocessed_illusion1.csv")

empirical <- empirical |>  
  filter(Illusion_Effect == "Incongruent") |> 
  group_by(Participant, Illusion_Type) |> 
  summarize(Empirical_Errors = as.numeric(sum(Error, na.rm=TRUE))) |> 
  full_join(
    empirical |> 
      filter(Illusion_Effect == "Incongruent", Error == 0) |> 
      group_by(Participant, Illusion_Type) |> 
      summarize(Empirical_MeanRT = mean(RT, na.rm=TRUE)), by = c("Participant", "Illusion_Type")) |> 
  ungroup() |> 
  pivot_wider(names_from = "Illusion_Type", values_from = c("Empirical_Errors", "Empirical_MeanRT")) |> 
  left_join(scores_i, by = "Participant") |> 
  left_join(illusion, by = "Participant")

correlation::correlation(select(empirical, starts_with("Empirical")), select(empirical, -Participant, -starts_with("Empirical"))) |> 
  summary() |> 
  plot() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

write.csv(full_join(left_join(scores_i, read.csv("../data/scores_illusion1.csv"), by = "Participant"), scores_p, by = "Participant"), "../data/scores_sem.csv", row.names = FALSE)

Validation on New Set